#Install packages
if(!("stargazer" %in% installed.packages()[,"Package"])) install.packages("stargazer")
if(!("magrittr" %in% installed.packages()[,"Package"])) install.packages("magrittr")
if(!("haven" %in% installed.packages()[,"Package"])) install.packages("haven")
if(!("plm" %in% installed.packages()[,"Package"])) install.packages("plm")Panel Data and Time Series in R
Panel Data Models
Loading Packages
library(tidyverse)
library(stargazer)
library(magrittr)
library(haven)
library(plm)
library(wooldridge)
library(estprod)We want to see the effect of training grants on firm scrap rate
#Note, we're going to edit the dataset so better create your own dataset where you can edit
JTRAIN <- data.frame(jtrain)
# We have to drop missing observations for the dependent variable
JTRAIN <- JTRAIN %>% filter(!is.na(lscrap)) %>% select(fcode, year, lscrap, tothrs,d88, d89, grant, grant_1)
stargazer(JTRAIN, type = "text")
===================================================
Statistic N Mean St. Dev. Min Max
---------------------------------------------------
fcode 162 416,313.600 3,758.513 410,523 419,483
year 162 1,988.000 0.819 1,987 1,989
lscrap 162 0.394 1.486 -4.605 3.401
tothrs 146 23.712 28.020 0 154
d88 162 0.333 0.473 0 1
d89 162 0.333 0.473 0 1
grant 162 0.179 0.385 0 1
grant_1 162 0.117 0.323 0 1
---------------------------------------------------
Pooled OLS
model_ols <- plm(formula = lscrap ~ tothrs + d88 + d89 + grant + grant_1,
data = JTRAIN,
index = c("fcode", "year"), # c(group index, time index)
model = "pooling")
summary(model_ols)Pooling Model
Call:
plm(formula = lscrap ~ tothrs + d88 + d89 + grant + grant_1,
data = JTRAIN, model = "pooling", index = c("fcode", "year"))
Unbalanced Panel: n = 49, T = 2-3, N = 146
Residuals:
Min. 1st Qu. Median 3rd Qu. Max.
-5.081736 -0.829602 -0.071229 1.078829 3.344532
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
(Intercept) 0.6625028 0.2231918 2.9683 0.003523 **
tothrs -0.0046484 0.0049508 -0.9389 0.349392
d88 -0.2679208 0.3256257 -0.8228 0.412028
d89 -0.5128693 0.3658318 -1.4019 0.163150
grant 0.3800988 0.3758131 1.0114 0.313568
grant_1 0.0848627 0.4491363 0.1889 0.850408
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Total Sum of Squares: 310.97
Residual Sum of Squares: 302.34
R-Squared: 0.027761
Adj. R-Squared: -0.0069617
F-statistic: 0.799506 on 5 and 140 DF, p-value: 0.5518
You will see in the results that only the intercept is statistically significant. Weʼre not interested in that. Weʼre interested in the effect of an additional training hour across firms (i) and over time (t) on the log of scrap rate. We see here that it is not significant.
First Difference Estimator
#We first do first differences
lag <- stats::lag
diff <- function(x) {x - stats::lag(x)} #we need to call the dplyr package since this o
#we created new variables for the first-differenced variables
JTRAIN %<>% group_by(fcode) %>%
mutate(dlscrap = diff(lscrap),
dtothrs = diff(tothrs),
dgrant = diff(grant)) %>%
ungroup()
model_fd <- lm(dlscrap ~ dtothrs + dgrant, JTRAIN)
summary(model_fd)
Call:
lm(formula = dlscrap ~ dtothrs + dgrant, data = JTRAIN)
Residuals:
Min 1Q Median 3Q Max
0 0 0 0 0
Coefficients: (2 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0 0 NaN NaN
dtothrs NA NA NA NA
dgrant NA NA NA NA
Residual standard error: 0 on 145 degrees of freedom
(16 observations deleted due to missingness)
You will notice in the results that again, the effect we want to see, the effect of increase in total hours of training from one year to the next (this is what is unique in the interpretation for first differences) for the same firm on the change in log scrap rate from one year to the next for the same firm is not significant.
Fixed Effects Estimator
#you can do this: model_fe <- update(model_ols, model = "within", effect = "individual"
model_fe <- plm(formula = lscrap ~ tothrs + d88 + d89 + grant + grant_1,
data = JTRAIN,
index = c("fcode", "year"), # c(group index, time index)
model = "within", effect = "individual")
summary(model_fe)Oneway (individual) effect Within Model
Call:
plm(formula = lscrap ~ tothrs + d88 + d89 + grant + grant_1,
data = JTRAIN, effect = "individual", model = "within", index = c("fcode",
"year"))
Unbalanced Panel: n = 49, T = 2-3, N = 146
Residuals:
Min. 1st Qu. Median 3rd Qu. Max.
-2.253927 -0.142083 -0.024063 0.159263 1.412912
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
tothrs -0.0047331 0.0029406 -1.6096 0.11092
d88 -0.0747307 0.1212112 -0.6165 0.53907
d89 -0.2182447 0.1555527 -1.4030 0.16397
grant -0.1175244 0.1811571 -0.6487 0.51812
grant_1 -0.4097214 0.2283540 -1.7942 0.07606 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Total Sum of Squares: 31.521
Residual Sum of Squares: 24.311
R-Squared: 0.22873
Adj. R-Squared: -0.21559
F-statistic: 5.45676 on 5 and 92 DF, p-value: 0.00019242
Our variable of interest, tothrs show that an increase of tothrs for the same firm from its mean on the log scrap rate from its mean for the same firm is not significant, however, receiving a grant in the previous period is statistically significant with 41% lower scrap rates
Random Effects Simulator
# model_re <- update(model_ols, model = "random", random.method = "walhus")
model_re <- plm(formula = lscrap ~ tothrs + d88 + d89 + grant + grant_1,
data = JTRAIN,
index = c("fcode", "year"), # c(group index, time index)
model = "random", random.method = "walhus")
summary(model_re)Oneway (individual) effect Random Effect Model
(Wallace-Hussain's transformation)
Call:
plm(formula = lscrap ~ tothrs + d88 + d89 + grant + grant_1,
data = JTRAIN, model = "random", random.method = "walhus",
index = c("fcode", "year"))
Unbalanced Panel: n = 49, T = 2-3, N = 146
Effects:
var std.dev share
idiosyncratic 0.2553 0.5053 0.118
individual 1.9172 1.3846 0.882
theta:
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.7501 0.7938 0.7938 0.7932 0.7938 0.7938
Residuals:
Min. 1st Qu. Median Mean 3rd Qu. Max.
-2.51427 -0.19917 -0.00172 -0.00019 0.27108 1.59999
Coefficients:
Estimate Std. Error z-value Pr(>|z|)
(Intercept) 0.6638756 0.2166307 3.0646 0.00218 **
tothrs -0.0047381 0.0028041 -1.6897 0.09108 .
d88 -0.0915398 0.1194667 -0.7662 0.44354
d89 -0.2483872 0.1517170 -1.6372 0.10159
grant -0.0741720 0.1751030 -0.4236 0.67186
grant_1 -0.3546065 0.2203972 -1.6089 0.10763
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Total Sum of Squares: 43.404
Residual Sum of Squares: 36.353
R-Squared: 0.16246
Adj. R-Squared: 0.13255
Chisq: 27.1435 on 5 DF, p-value: 5.3487e-05
# The theta parameter is listed in the summary; the theta is the random effects parameterFor RE, for each add’l hour in tothrs, the scrap rate is lower by 0.5%. The coefficients on time dummies and grants are not significant.
Hausman Test
# Fixed effects estimator
summary(model_fe)Oneway (individual) effect Within Model
Call:
plm(formula = lscrap ~ tothrs + d88 + d89 + grant + grant_1,
data = JTRAIN, effect = "individual", model = "within", index = c("fcode",
"year"))
Unbalanced Panel: n = 49, T = 2-3, N = 146
Residuals:
Min. 1st Qu. Median 3rd Qu. Max.
-2.253927 -0.142083 -0.024063 0.159263 1.412912
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
tothrs -0.0047331 0.0029406 -1.6096 0.11092
d88 -0.0747307 0.1212112 -0.6165 0.53907
d89 -0.2182447 0.1555527 -1.4030 0.16397
grant -0.1175244 0.1811571 -0.6487 0.51812
grant_1 -0.4097214 0.2283540 -1.7942 0.07606 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Total Sum of Squares: 31.521
Residual Sum of Squares: 24.311
R-Squared: 0.22873
Adj. R-Squared: -0.21559
F-statistic: 5.45676 on 5 and 92 DF, p-value: 0.00019242
# Random effects estimator
summary(model_re)Oneway (individual) effect Random Effect Model
(Wallace-Hussain's transformation)
Call:
plm(formula = lscrap ~ tothrs + d88 + d89 + grant + grant_1,
data = JTRAIN, model = "random", random.method = "walhus",
index = c("fcode", "year"))
Unbalanced Panel: n = 49, T = 2-3, N = 146
Effects:
var std.dev share
idiosyncratic 0.2553 0.5053 0.118
individual 1.9172 1.3846 0.882
theta:
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.7501 0.7938 0.7938 0.7932 0.7938 0.7938
Residuals:
Min. 1st Qu. Median Mean 3rd Qu. Max.
-2.51427 -0.19917 -0.00172 -0.00019 0.27108 1.59999
Coefficients:
Estimate Std. Error z-value Pr(>|z|)
(Intercept) 0.6638756 0.2166307 3.0646 0.00218 **
tothrs -0.0047381 0.0028041 -1.6897 0.09108 .
d88 -0.0915398 0.1194667 -0.7662 0.44354
d89 -0.2483872 0.1517170 -1.6372 0.10159
grant -0.0741720 0.1751030 -0.4236 0.67186
grant_1 -0.3546065 0.2203972 -1.6089 0.10763
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Total Sum of Squares: 43.404
Residual Sum of Squares: 36.353
R-Squared: 0.16246
Adj. R-Squared: 0.13255
Chisq: 27.1435 on 5 DF, p-value: 5.3487e-05
# Hausman test for fixed versus random effects
phtest(model_fe, model_re)
Hausman Test
data: lscrap ~ tothrs + d88 + d89 + grant + grant_1
chisq = 1.225, df = 5, p-value = 0.9425
alternative hypothesis: one model is inconsistent
# If the Hausman test statistic is insignificant, use RE estimator because it is efficient
# If the Hausman test statistic is significant, use FE estimator because it is consistenThe results would show that you will choose the RE.
Time Series
#Install packages
if(!("tseries" %in% installed.packages()[,"Package"])) install.packages("tseries")
if(!("forecast" %in% installed.packages()[,"Package"])) install.packages("forecast")
if(!("tidyverse" %in% installed.packages()[,"Package"])) install.packages("tidyverse")
#Load packages
library(tseries)
library(forecast)
library(tidyverse)Simulating the Data:
# Simulating the Series
AR1 <- list(order = c(1,0,0), ar = 0.5, sd = 0.01)
AR1 <- arima.sim(n = 10000, model = AR1)
MA1 <- list(order = c(0,0,1), ma = 0.7, sd = 0.01)
MA1 <- arima.sim(n = 10000, model = MA1)
ARMA11 <- list(order = c(1,0,1), ar = 0.5, ma = 0.5, sd = 0.01)
ARMA11 <- arima.sim(n = 10000, model = ARMA11)# Graphing the Three Series
library(ggplot2) # Load ggplot2 for plotting
combo <- cbind(AR1, MA1, ARMA11)
autoplot(combo, facets = TRUE)+
ggtitle("Time Series Plots")# Showing the PACF and ACF
par(mfrow = c(3, 2)) # Adjust layout for plots
plot.ts(AR1)
acf(AR1) # Geometric Decay
pacf(AR1) # Cutoff after first lag
plot.ts(MA1)
acf(MA1) # Cutoff after first lag
pacf(MA1) # Geometric Decayplot.ts(ARMA11)
acf(ARMA11) # Geometric Decay
pacf(ARMA11) # Geometric DecayTrying with CPI dataset
#Install packages
if(!("urca" %in% installed.packages()[,"Package"])) install.packages("TSstudio")
if(!("TSstudio" %in% installed.packages()[,"Package"])) install.packages("TSstudio")
#Load packages
library(tseries)
library(forecast)
library(tidyverse)
library(urca)
library(TSstudio)# Loading
cpi<-read_csv("MonthlyCPI.csv")
cpi<-ts(cpi$CPI, start=c(2000,1,5), frequency=12) We specified the start date to be 2000 then R will automatically detect when it will end. We specified the frequency, we can see that it is monthly, so 12.
Let’s see our series; here, unlike before where we use geom_point, etc., we simply use autoplot.
autoplot(cpi) +
ggtitle("CPI (Philippines), January 2000 to April 2020") +
labs(x = "Time", y = "CPI")summary(cpi) # to get the mean, highest, and lowest value Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
1.000 2.875 21.100 38.772 69.650 122.600 116
Forecasting Building Blocks
Generate the ACF and PACF
ggAcf(cpi)+ggtitle("ACF of CPI")ggPacf(cpi)+ggtitle("PACF of CPI")dcpi<-diff(cpi) ggAcf(as.numeric(dcpi))+ggtitle("ACF of CPI (Differenced)")ggPacf(as.numeric(dcpi))+ggtitle("PACF of CPI (Differenced)")ts_decompose(cpi, type="additive", showline=TRUE)Tests for Non-stationarity
a. Augmented Dickey Fuller Test
#adf.test(cpi) #adf.test(cpi, k = 1) #adf.test(cpi, k = 2) #adf.test(dcpi) #limitation of ADF is specifying the lag order
- Phillips Perron Test
#no need to specify lag order
#pp.test(cpi)
#pp.test(dcpi)- KPSS Test
kpss.test(cpi)
KPSS Test for Level Stationarity
data: cpi
KPSS Level = 8.7093, Truncation lag parameter = 6, p-value = 0.01
kpss.test(dcpi)
KPSS Test for Level Stationarity
data: dcpi
KPSS Level = 4.1996, Truncation lag parameter = 6, p-value = 0.01